library(foreign)
library(MASS)
library(stargazer)
library(ggplot2)
library(doBy)
library(plm)
library(plyr)
library(survival)
library(DataCombine)
library(lme4)
library(interplot)
library(lfe)
library(xtable)

#Import data
setwd("~/Dropbox/NLSC/Caste conflict/PRQ_Cond_Accept/")
ind.dat <- read.dta("CasteCrimeV7.dta")
summary(ind.dat)



###################
###################
###Main Analysis###
###################
###################

############
###Models###
############

#######Baseline
ev.1.i <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF+t+t2|fid2|0|fid2,
               data=ind.dat)
summary(ev.1.i)

#######Medium
ev.2.i <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + tempNew+
                 t+t2|fid2|0|fid2,
               data=ind.dat)
summary(ev.2.i)

#######Full lag DV
ev.3.i <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                 laglogviol + STreserved + SCreserved + tempNew +
                 t+t2|fid2|0|fid2,
               data=ind.dat)
summary(ev.3.i)


###Export to LaTex
stargazer(ev.1.i, ev.2.i, ev.3.i)


###########################
###Plot the Interactions###
###########################

#Run model
ev.3.ir <- lm(logviol ~ prec_shock_N + c_enop + prec_shock_N*c_enop + logNLMean +
                laglogviol + STreserved + SCreserved + tempNew +
                t+t2+factor(fid2), data=ind.dat)
###Plot first interaction
setwd("~/Google Drive/Caste conflict/")
jpeg("intfull.jpeg", width = 6, height = 6, units = 'in', res = 500)
interplot(m = ev.3.ir, var1 = "prec_shock_N", var2 = "c_enop")+
  # Add labels for X and Y axes
  xlab("Electoral Competitiveness") +
  ylab("Estimated Coefficient for\nDrought") +
  # Change the background
  theme_bw() +
  theme(plot.title = element_text(face="bold")) +
  # Add a horizontal line at y = 0
  geom_hline(yintercept = 0, linetype = "dashed")
dev.off()


###Plot second interaction
setwd("~/Google Drive/Caste conflict/")
jpeg("intfull2.jpeg", width = 6, height = 6, units = 'in', res = 500)
interplot(m = ev.3.ir, var1 = "c_enop", var2 = "prec_shock_N")+
  # Add labels for X and Y axes
  xlab("Drought") +
  ylab("Estimated Coefficient for\nElectoral Competitiveness") +
  # Change the background
  theme_bw() +
  theme(plot.title = element_text(face="bold")) +
  # Add a horizontal line at y = 0
  geom_hline(yintercept = 0, linetype = "dashed")
dev.off()



#######################
#######################
###Robustness Models###
#######################
#######################

################
###Population###
################
ev.3.i.pop <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + logpopseries +
                 laglogviol + STreserved + SCreserved + tempNew +
                 t+t2|fid2|0|fid2,
               data=ind.dat)
summary(ev.3.i.pop)

##################
###NO FE Models###
##################

###Add non-time varying controls
ev.3.i.nf <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + logpopseries +laglogviol+
                    logmeanelevation + logdistcapital + logdistborder + STreserved + SCreserved + caste_per + tribe_per 
                  + tempNew + t+t2|0|0|fid2,
                  data=ind.dat)
summary(ev.3.i.nf)

###Historical party rates
#Normalize party concentration by turnout (cannot use fid2 FEs)
ind.dat$PNegXHist <- ind.dat$histm_enop*ind.dat$prec_shock_N
#Run model
ev.3.i.h <- felm(logviol ~ prec_shock_N + histm_enop + PNegXHist + logNLMean + 
                   laglogviol + STreserved + SCreserved + tempNew +
                   t+t2|0|0|fid2,
                 data=ind.dat)
summary(ev.3.i.h)


####################################################
###Normalize the DV by proportions of SCs and STs###
####################################################
#Create normalized DV
ind.dat$viol_norm_scst <- ind.dat$viol/(ind.dat$tot_sc+ind.dat$tot_st)
summary(ind.dat$viol_norm_scst)
ind.dat$logviol_norm_scst <- log(ind.dat$viol_norm_scst+1)
#Create normalized DV lag
ind.dat$lagviol_norm_scst <- ind.dat$lagviol/(ind.dat$tot_sc+ind.dat$tot_st)
summary(ind.dat$lagviol_norm_scst)
ind.dat$loglagviol_norm_scst <- log(ind.dat$lagviol_norm_scst+1)
#Run model
ev.3.i.norm <- felm(logviol_norm_scst ~ prec_shock_N + c_enop + PNegXIHF + logNLMean +
                      loglagviol_norm_scst + STreserved + SCreserved + tempNew +
                      t+t2|fid2|0|fid2,
                    data=ind.dat)
summary(ev.3.i.norm)

###################
##Hate Crimes DV###
###################
ind.dat$logcivilrights_act <- log(ind.dat$civilrights_act+1)
ind.dat.s <- slide(ind.dat, Var="logcivilrights_act", TimeVar="year", GroupVar="fid2", NewVar="laglogcivilrights_act", slideBy = -1,
                   keepInvalid = FALSE, reminder = TRUE)
#run model
ev.3.i.cra <- felm(logcivilrights_act ~ prec_shock_N + c_enop + PNegXIHF + logNLMean +
                     laglogcivilrights_act + STreserved + SCreserved + tempNew +
                     t+t2|fid2|0|fid2,
                   data=ind.dat.s)
summary(ev.3.i.cra)


#################
###Spatial Lags##
#################

###Mean Spl attacks
ev.3.i.lag.mean <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                          laglogviol + n_mean_viol + STreserved + SCreserved + tempNew +
                          t+t2|fid2|0|fid2,
                        data=ind.dat)
summary(ev.3.i.lag.mean)
###Max Spl attacks
ev.3.i.lag.max <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                         laglogviol + n_max_viol + STreserved + SCreserved + tempNew +
                         t+t2|fid2|0|fid2,
                       data=ind.dat)
summary(ev.3.i.lag.max)
###Min Spl attacks
ev.3.i.lag.min <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                         laglogviol + n_min_viol + STreserved + SCreserved + tempNew +
                         t+t2|fid2|0|fid2,
                       data=ind.dat)
summary(ev.3.i.lag.min)


###################################
##Tests of IV Operationalization###
###################################

###High competition models
#Create a binary variable of high competition (five normalized share or more)
ind.dat$high_comp <- ifelse(ind.dat$c_enop>=5,1,0)
table(ind.dat$high_comp)
##Create interaction terms 
#Drought
ind.dat$PNegXIHF5 <- ind.dat$prec_shock_N*ind.dat$high_comp
#Run model
ev.3.i.hc <- felm(logviol ~ prec_shock_N + high_comp + PNegXIHF5 + logNLMean +
                    laglogviol + STreserved + SCreserved + tempNew +
                    t+t2|fid2|0|fid2,
                  data=ind.dat)
summary(ev.3.i.hc)

###Lower drought threshold (1 SD)
#Create shock variable
ind.dat$PNeg1sd <- ifelse(ind.dat$prec_dep<(-1),1,0)
table(ind.dat$PNeg1sd)
#Create interaction
ind.dat$PNeg1sdXIHF <- ind.dat$PNeg1sd*ind.dat$c_enop
#Run model
ev.3.i.1sd <- felm(logviol ~ PNeg1sd + c_enop + PNeg1sdXIHF + logNLMean + 
                     laglogviol + STreserved + SCreserved + tempNew +
                     t+t2|fid2|0|fid2,
                   data=ind.dat)
summary(ev.3.i.1sd)

###Adding lag drought and their interactions
ind.dat$lagPNegXIHF <- ind.dat$lagprec_shock_N*ind.dat$c_enop
ind.dat$lag2PNegXIHF <- ind.dat$lag2prec_shock_N*ind.dat$c_enop
ev.3.i.lagdrint <- felm(logviol ~  lag2prec_shock_N + lagprec_shock_N + prec_shock_N + c_enop + 
                          PNegXIHF + lagPNegXIHF + lag2PNegXIHF + logNLMean + 
                          laglogviol + STreserved + SCreserved + tempNew +
                          t+t2|fid2|0|fid2,
                        data=ind.dat)
summary(ev.3.i.lagdrint)


#########################
###Electoral Districts###
#########################

###Only one district electoral districts
#Subset data
ind.dat.nf <- subset(ind.dat, nofed==1)
#Run model
ev.3.i.nf2 <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean +
                    laglogviol + STreserved + SCreserved + tempNew +
                    t+t2|fid2|0|fid2,
                  data=ind.dat.nf)
summary(ev.3.i.nf2)

###Omit northeast
ind.dat.el <- subset(ind.dat, NE==0)
#Run model
ev.3.i.ne <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                    laglogviol + STreserved + SCreserved + tempNew +
                    t+t2|fid2|0|fid2,
                  data=ind.dat.el)
summary(ev.3.i.ne)

###Drop UT
#Subset data
ind.dat.ut <- subset(ind.dat, UT==0)
#Run model
ev.3.i.ut <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean +
                    laglogviol + STreserved + SCreserved + tempNew +
                    t+t2|fid2|0|fid2,
                  data=ind.dat.ut)
summary(ev.3.i.ut)

###Drop Both
#Subset data
ind.dat.neut <- subset(ind.dat.ut, NE==0)
#Run model
ev.3.i.neut <-  felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                       laglogviol + STreserved + SCreserved + tempNew +
                       t+t2|fid2|0|fid2,
                     data=ind.dat.neut)
summary(ev.3.i.neut)


#######################################################
###GMM Models for Endogeneity and Serial Correlation###
#######################################################

#Convert data to pgmm format
m.pgmm <- pdata.frame(ind.dat, index=c("fid2", "year"))

#set seed
set.seed(50)

##System GMM, twostep SEs
##Short lags
gmm.3.s <- pgmm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                  laglogviol + STreserved + SCreserved + tempNew | 
                  lag(logviol, 2:4),
                data = m.pgmm, effect = "individual", transformation= "ld", model = "twostep")
summary(gmm.3.s)
#R^2
# Getting the actual Y values out.
Y <- c()
for( i in 1:length(gmm.3.s$model)){ Y <- c(Y,gmm.3.s$model[[i]][,1])}
# Note that the fitted-values are exactly what you need.
Yhat <- gmm.3.s$fitted.values[1:length(gmm.3.s$fitted.values)]
# Squared correlation of fitted values and actual values
cor(Y,Yhat)^2

##Deep lags
gmm.3 <- pgmm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                laglogviol + STreserved + SCreserved + tempNew | 
                lag(logviol, 2:99),
              data = m.pgmm, effect = "individual", transformation= "ld", model = "twostep")
summary(gmm.3)
#R^2
# Getting the actual Y values out.
Y <- c()
for( i in 1:length(gmm.3$model)){ Y <- c(Y,gmm.3$model[[i]][,1])}
# Note that the fitted-values are exactly what you need.
Yhat <- gmm.3$fitted.values[1:length(gmm.3$fitted.values)]
# Squared correlation of fitted values and actual values
cor(Y,Yhat)^2


#########################
###Temporal Dependence###
#########################

##Linear time 
ev.3.i.t <- felm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                   laglogviol + STreserved + SCreserved + tempNew +
                   t|fid2|0|fid2,
                 data=ind.dat)
summary(ev.3.i.t)

##T2 interaction with district FEs
ev.3.i.t2inter <- lm(logviol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                       laglogviol + STreserved + SCreserved + tempNew+t+t2+factor(fid2)*t2 + cluster(fid2),
                     data=ind.dat)
summary(ev.3.i.t2inter)

##Count model
nb.3.i <- glm(viol ~ prec_shock_N + c_enop + PNegXIHF + logNLMean + 
                lagviol + STreserved + SCreserved + tempNew +
                t+t2+factor(fid2)+cluster(fid2), family="poisson",
              data=ind.dat)
summary(nb.3.i)

##Export the models to LaTex
#Table A.2
stargazer(ev.3.i.pop,ev.3.i.nf,ev.3.i.h,ev.3.i.norm,ev.3.i.cra) 
#Table A.3      
stargazer(ev.3.i.lag.mean,ev.3.i.lag.max,ev.3.i.lag.min,ev.3.i.hc,ev.3.i.1sd,ev.3.i.lagdrint)
#Table A.4
stargazer(ev.3.i.nf2,ev.3.i.ne,ev.3.i.ut,ev.3.i.neut)
#Table A.5
stargazer(gmm.3.s,gmm.3,ev.3.i.t,ev.3.i.t2inter, nb.3.i)

###################
###################
###Summary Stats###
###################
###################
#Social conflict (log)
c(min(ind.dat$logviol, na.rm=TRUE),median(ind.dat$logviol, na.rm=TRUE),mean(ind.dat$logviol, na.rm=TRUE),max(ind.dat$logviol, na.rm=TRUE),sd(ind.dat$logviol, na.rm=TRUE))
#Hate crimes (log)
c(min(ind.dat.s$logcivilrights_act, na.rm=TRUE),median(ind.dat.s$logcivilrights_act, na.rm=TRUE) ,mean(ind.dat.s$logcivilrights_act, na.rm=TRUE),max(ind.dat.s$logcivilrights_act, na.rm=TRUE),sd(ind.dat.s$logcivilrights_act, na.rm=TRUE))
#Drought
c(min(ind.dat$prec_shock_N, na.rm=TRUE),median(ind.dat$prec_shock_N, na.rm=TRUE) ,mean(ind.dat$prec_shock_N, na.rm=TRUE),max(ind.dat$prec_shock_N, na.rm=TRUE),sd(ind.dat$prec_shock_N, na.rm=TRUE))
#Electoral competition
c(min(ind.dat$c_enop, na.rm=TRUE),median(ind.dat$c_enop, na.rm=TRUE) ,mean(ind.dat$c_enop, na.rm=TRUE),max(ind.dat$c_enop, na.rm=TRUE),sd(ind.dat$c_enop, na.rm=TRUE))
#Nighttime light log
c(min(ind.dat$logNLMean, na.rm=TRUE),median(ind.dat$logNLMean, na.rm=TRUE) ,mean(ind.dat$logNLMean, na.rm=TRUE),max(ind.dat$logNLMean, na.rm=TRUE),sd(ind.dat$logNLMean, na.rm=TRUE))
#Temperature
c(min(ind.dat$tempNew, na.rm=TRUE),median(ind.dat$tempNew, na.rm=TRUE) ,mean(ind.dat$tempNew, na.rm=TRUE),max(ind.dat$tempNew, na.rm=TRUE),sd(ind.dat$tempNew, na.rm=TRUE))
#Reserve seats caste
c(min(ind.dat$SCreserved, na.rm=TRUE),median(ind.dat$SCreserved, na.rm=TRUE) ,mean(ind.dat$SCreserved, na.rm=TRUE),max(ind.dat$SCreserved, na.rm=TRUE),sd(ind.dat$SCreserved, na.rm=TRUE))
#Reserve tribe caste
c(min(ind.dat$STreserved, na.rm=TRUE),median(ind.dat$STreserved, na.rm=TRUE) ,mean(ind.dat$STreserved, na.rm=TRUE),max(ind.dat$STreserved, na.rm=TRUE),sd(ind.dat$STreserved, na.rm=TRUE))
#Lag DV
c(min(ind.dat.2$laglogviol, na.rm=TRUE),median(ind.dat.2$laglogviol, na.rm=TRUE) ,mean(ind.dat.2$laglogviol, na.rm=TRUE),max(ind.dat.2$laglogviol, na.rm=TRUE),sd(ind.dat.2$laglogviol, na.rm=TRUE))
#Population log
c(min(ind.dat$logpopseries, na.rm=TRUE),median(ind.dat$logpopseries, na.rm=TRUE) ,mean(ind.dat$logpopseries, na.rm=TRUE),max(ind.dat$logpopseries, na.rm=TRUE),sd(ind.dat$logpopseries, na.rm=TRUE))
#Border distance log
c(min(ind.dat$logdistborder, na.rm=TRUE),median(ind.dat$logdistborder, na.rm=TRUE) ,mean(ind.dat$logdistborder, na.rm=TRUE),max(ind.dat$logdistborder, na.rm=TRUE),sd(ind.dat$logdistborder, na.rm=TRUE))
#Elevation log
c(min(ind.dat$logmeanelevation, na.rm=TRUE),median(ind.dat$logmeanelevation, na.rm=TRUE),mean(ind.dat$logmeanelevation, na.rm=TRUE),max(ind.dat$logmeanelevation, na.rm=TRUE),sd(ind.dat$logmeanelevation, na.rm=TRUE))
#Distance to capital log
c(min(ind.dat$logdistcapital, na.rm=TRUE),median(ind.dat$logdistcapital, na.rm=TRUE) ,mean(ind.dat$logdistcapital, na.rm=TRUE),max(ind.dat$logdistcapital, na.rm=TRUE),sd(ind.dat$logdistcapital, na.rm=TRUE))
#Percent caste
c(min(ind.dat$caste_per, na.rm=TRUE),median(ind.dat$caste_per, na.rm=TRUE) ,mean(ind.dat$caste_per, na.rm=TRUE),max(ind.dat$caste_per, na.rm=TRUE),sd(ind.dat$caste_per, na.rm=TRUE))
#Percent tribe
c(min(ind.dat$tribe_per, na.rm=TRUE),median(ind.dat$tribe_per, na.rm=TRUE) ,mean(ind.dat$tribe_per, na.rm=TRUE),max(ind.dat$tribe_per, na.rm=TRUE),sd(ind.dat$tribe_per, na.rm=TRUE))
#Spatial lag - mean
c(min(ind.dat$n_mean_viol, na.rm=TRUE),median(ind.dat$n_mean_viol, na.rm=TRUE) ,mean(ind.dat$n_mean_viol, na.rm=TRUE),max(ind.dat$n_mean_viol, na.rm=TRUE),sd(ind.dat$n_mean_viol, na.rm=TRUE))
#Spatial lag - max
c(min(ind.dat$n_max_viol, na.rm=TRUE),median(ind.dat$n_max_viol, na.rm=TRUE) ,mean(ind.dat$n_max_viol, na.rm=TRUE),max(ind.dat$n_max_viol, na.rm=TRUE),sd(ind.dat$n_max_viol, na.rm=TRUE))
#Spatial lag - min
c(min(ind.dat$n_min_viol, na.rm=TRUE),median(ind.dat$n_min_viol, na.rm=TRUE) ,mean(ind.dat$n_min_viol, na.rm=TRUE),max(ind.dat$n_min_viol, na.rm=TRUE),sd(ind.dat$n_min_viol, na.rm=TRUE))
#Historical party shares
c(min(ind.dat$histm_enop, na.rm=TRUE),median(ind.dat$histm_enop, na.rm=TRUE) ,mean(ind.dat$histm_enop, na.rm=TRUE),max(ind.dat$histm_enop, na.rm=TRUE),sd(ind.dat$histm_enop, na.rm=TRUE))
#High competition
c(min(ind.dat$high_comp, na.rm=TRUE),median(ind.dat$high_comp, na.rm=TRUE) ,mean(ind.dat$high_comp, na.rm=TRUE),max(ind.dat$high_comp, na.rm=TRUE),sd(ind.dat$high_comp, na.rm=TRUE))
#Lower drought threshold
c(min(ind.dat$PNeg1sd, na.rm=TRUE),median(ind.dat$PNeg1sd, na.rm=TRUE) ,mean(ind.dat$PNeg1sd, na.rm=TRUE),max(ind.dat$PNeg1sd, na.rm=TRUE),sd(ind.dat$PNeg1sd, na.rm=TRUE))
#Drought t-1
c(min(ind.dat$lagprec_shock_N, na.rm=TRUE),median(ind.dat$lagprec_shock_N, na.rm=TRUE) ,mean(ind.dat$lagprec_shock_N, na.rm=TRUE),max(ind.dat$lagprec_shock_N, na.rm=TRUE),sd(ind.dat$lagprec_shock_N, na.rm=TRUE))
#Drought t-2
c(min(ind.dat$lag2prec_shock_N, na.rm=TRUE),median(ind.dat$lag2prec_shock_N, na.rm=TRUE) ,mean(ind.dat$lag2prec_shock_N, na.rm=TRUE),max(ind.dat$lag2prec_shock_N, na.rm=TRUE),sd(ind.dat$lag2prec_shock_N, na.rm=TRUE))

##Plot histogram of the DV
#Create an indicator of zero cases removed
zero_conf <- subset(ind.dat, viol>0)
zero_conf <- zero_conf[,c("viol")]
##Plot
#All sample
jpeg("hist.jpeg", width = 6, height = 6, units = 'in', res = 500)
qplot(ind.dat$viol,
      geom="histogram",
      binwidth = 0.5,  
      # main = "Histogram for Social Conflict_t", 
      xlab = "Number of incidents",  
      ylab = "Frequency in sample",
      fill=I("darkred"))
dev.off()
#Zero incidents removed
jpeg("histnz.jpeg", width = 6, height = 6, units = 'in', res = 500)
qplot(zero_conf,
      geom="histogram",
      binwidth = 0.5,  
      # main = "Histogram for Social Conflict_t (zero cases removed)", 
      xlim=c(1,74),
      xlab = "Number of incidents",  
      ylab = "Frequency in sample",
      fill=I("darkred"))
dev.off()

###LOWESS PLOTS
b <- na.omit(ind.dat[,c("logviol","precNew","c_enop")])
mod.lo <- stats::loess(logviol ~ precNew + c_enop, span=.5, degree=1, data=b)
#Plot predicted estimates
drought <- with(b, seq(min(precNew), max(precNew), len=25))
enop <- with(b, seq(min(c_enop), max(c_enop), len=25))
new.data <- expand.grid(precNew=drought, c_enop=enop)
#Combine data
fit.deaths <- matrix(predict(mod.lo, new.data), 25, 25, 25)
#Plot
jpeg("cor4.jpeg", width = 6, height = 6, units = 'in', res = 500)
persp(drought, enop, fit.deaths, theta=45, phi=30, ticktype = "detailed",
      xlab="Average Precipitation", ylab= "Dem. Compete.", zlab="Social Conflict",
      # main="Food Insecurity, Water Insecurity, \nand Social Unrest",
      shade =0.5,col = "red")
dev.off()

######################
######################
###Mediation Models###
######################
######################
##Keep all variables without NAs for adequate comparison
ind.dat.na <- na.omit(ind.dat[,c("logviol","prec_shock_N","c_enop","logNLMean","laglogviol","STreserved","SCreserved","tempNew","t","t2","fid2")])
#Run effect of exogenous variable on mediator
ev.3.a <- felm(c_enop ~ prec_shock_N + logNLMean + 
                 laglogviol + STreserved + SCreserved + tempNew +
                 t+t2|fid2|0|fid2,
               data=ind.dat.na)
summary(ev.3.a)
#Run effect of both exogenous variable and mediator on violence to show robustness of effect happening only through mediator
ev.3.b <- felm(logviol ~ prec_shock_N + c_enop + logNLMean + 
                 laglogviol + STreserved + SCreserved + tempNew +
                 t+t2|fid2|0|fid2,
               data=ind.dat.na)
summary(ev.3.b)
#Export to LaTex
stargazer(ev.3.a,ev.3.b)
